knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(pals)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggpubr)
library("readxl")
library(reshape)
##
## Attaching package: 'reshape'
##
## The following object is masked from 'package:lubridate':
##
## stamp
##
## The following object is masked from 'package:dplyr':
##
## rename
##
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
library(ggplotify)
library(pheatmap)
library(ggh4x)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:reshape':
##
## stamp
##
## The following object is masked from 'package:ggpubr':
##
## get_legend
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggside)
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg ggplot2
library(enrichR)
## Welcome to enrichR
## Checking connection ...
## Enrichr ... Connection is Live!
## FlyEnrichr ... Connection is Live!
## WormEnrichr ... Connection is Live!
## YeastEnrichr ... Connection is Live!
## FishEnrichr ... Connection is Live!
## OxEnrichr ... Connection is Live!
library(ggmagnify)
library(ggrepel)
library(smplot2)
## Updated tutorial for smplot: smin95.github.io/dataviz/
library(Seurat)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, were retired in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## Attaching SeuratObject
library(RColorBrewer)
library(dplyr)
library(ggbreak)
## ggbreak v0.1.2
##
## If you use ggbreak in published research, please cite the following
## paper:
##
## S Xu, M Chen, T Feng, L Zhan, L Zhou, G Yu. Use ggbreak to effectively
## utilize plotting space to deal with large datasets and outliers.
## Frontiers in Genetics. 2021, 12:774846. doi: 10.3389/fgene.2021.774846
library(patchwork)
##
## Attaching package: 'patchwork'
##
## The following object is masked from 'package:cowplot':
##
## align_plots
`%!in%` = Negate(`%in%`)
my_pal <- c("#E7E7E0","#A2A187", "#525240", "#03878F", "#F2B531","#ED5958","#68C6A4", "#113245")
save_figures <- TRUE
## create directories
if(save_figures){
dir.create(path = "./local/panels/Final/Main/1/",recursive = TRUE, showWarnings = FALSE)
dir.create(path = "./local/panels/Final/Main/2/",recursive = TRUE, showWarnings = FALSE)
dir.create(path = "./local/panels/Final/Main/3/",recursive = TRUE, showWarnings = FALSE)
dir.create(path = "./local/panels/Final/Supplementary/1/",recursive = TRUE, showWarnings = FALSE)
dir.create(path = "./local/panels/Final/Reviewer/1/",recursive = TRUE, showWarnings = FALSE)
}
Figure 1 b
Competition_df <- read.table("data/NeurIPS_results.csv", header = T, sep = ",")
Competition_df <- cbind.data.frame(RMSE = Competition_df$GEX2ADT, Category = "Competition")
Competition_df <- filter(Competition_df, RMSE < 0.7)
test_results <- read_excel("data/neurips-testset.xls")
test_results <- as.data.frame(test_results)
test_results[c('Method', 'Seed')] <- str_split_fixed(test_results$Meth, '-', 2)
sclin <- test_results %>% group_by(Method) %>% dplyr::summarise(GEX2ADT = mean(RMSE))
sclin <- sclin[,2:1]
names(sclin) <- colnames(Competition_df)
df <- rbind(Competition_df, sclin)
df <- df[order(df$RMSE, decreasing = F),]
df$Rank <- 1:nrow(df)
df$Category <- factor(df$Category, levels = c("Babel_Dance","KRR_new","Vanilla_NN","ScLinear"))
ggplot(df, aes(x=Rank, y=RMSE, col=Category)) + geom_point(size=2) + theme_classic2() +
scale_color_manual(values = c(my_pal)) + scale_fill_manual(values = c(my_pal)) +theme(legend.position = "none") + theme(legend.title = element_blank()) + theme(legend.text = element_text(size = 8)) +
geom_label_repel(nudge_y = 0.05,label = ifelse(!is.na(df$Category), as.character(df$Category),"")) +
ylim(0.3,0.55)
## Warning: Removed 2 rows containing missing values (`geom_label_repel()`).

if(save_figures){
ggsave("local/panels/Final/Main/1/Figure.1.b.r2.pdf", width = 20/3, height = 29/4, units = 'cm')
write.table(df, "data/Figure.1.b.csv", sep = ",", col.names = TRUE, row.names = FALSE)
}
## Warning: Removed 2 rows containing missing values (`geom_label_repel()`).
Figure 1 c
my_data <- read_excel("local/time-memory2.xls",sheet = 2, )
scLinear <- melt(as.data.frame(my_data), id.vars = "time")
scLinear[c('Method', 'Seed')] <- str_split_fixed(scLinear$variable, '-', 2)
my_data <- read_excel("local/time-memory2.xls",sheet = 1)
Other <- na.omit(melt(as.data.frame(my_data), id.vars = "time"))
Other[c('Method', 'Seed')] <- str_split_fixed(Other$variable, '-', 2)
Time_RAM <- rbind(scLinear, Other)
Time <- Time_RAM %>% group_by(Method, Seed) %>% slice(which.max(time))
Time_sum <- Time %>% group_by(Method) %>% dplyr::summarise(Mean = mean(time), Sd = sd(time))
Time_sum$Method <- factor(Time_sum$Method, levels = c("Babel_Dance","KRR_new","Vanilla_NN","ScLinear"))
ggplot(Time_sum, aes(x=Method, y=Mean, fill=Method)) + geom_bar(stat="identity") + theme_classic2() +
scale_fill_manual(values = my_pal) + theme(legend.position = "none") + ylab("Time (sec)") +
theme(axis.text.x = element_text(angle = 90)) +
geom_errorbar( aes(x=Method, ymin=Mean-Sd, ymax=Mean+Sd), width=0.2, colour="black", alpha=0.8) +
geom_point(data = Time, aes(x=Method, y=time), size = 0.8)

if(save_figures){ggsave("local/panels/Final/Main/1/Figure.1.c.r2.pdf", width = 20/3, height = 29/4, units = 'cm')
write.table(Time_sum, "data/Figure.1.c.csv", sep = ",", col.names = TRUE, row.names = FALSE)
}
Figure 2 a
PBMC <- read.table("./local/PBMC/bit_table_pbmc10k_2.csv", header = T, sep=',')
PBMC <- PBMC %>% dplyr::select(-c(rna_normalized, rna_raw))
# de-duplicate the separation of complexes that was used for RNA/ADT comparison
map <- list(
list(gexp = c("CD3E", "CD3D", "CD3G"), adt = c("CD3")),
list(gexp = c("CD8A", "CD8B"), adt = c("CD8"))
)
for (m in map){
print(paste0("gexp: ", paste0(m$gexp, collapse = ","), " adt: ", paste0(m$adt, collapse = ",")))
gexp_names <- m$gexp
adt_name <- m$adt
for(gexp_name in gexp_names){
PBMC$gene[PBMC$gene == gexp_name] <- adt_name
}
}
## [1] "gexp: CD3E,CD3D,CD3G adt: CD3"
## [1] "gexp: CD8A,CD8B adt: CD8"
PBMC <- PBMC %>% distinct()
PBMC <- na.omit(PBMC)
cors <- PBMC %>% group_by(gene) %>% dplyr::summarise(correlation = cor(real, predicted_sclinear))
ggplot(cors, aes(x=reorder(gene, correlation), y= correlation,fill=correlation)) + geom_bar(stat="identity", col="black") + coord_flip() +
theme_classic2() + scale_fill_gradientn(colours = inferno(11)) + ylab("Pearson\n(Real vs ScLinear)") + xlab("Protein") + theme(legend.position = "none") +
ggtitle("PBMC CITE-seq\n(10k cells)") + theme(plot.title = element_text(hjust = 0.5))

if(save_figures){ggsave("local/panels/Final/Main/1/Figure.2.a.r2.pdf", width = 20/3, height = 29/3, units = 'cm')
write.table(cors, "data/Figure.2.a.csv", sep = ",", col.names = TRUE, row.names = FALSE)
}
Figure 3 c
p1 <- ggplot(df[which(df$gene %in% c("CD19")),], aes(x=predicted, y=real, col=scMRMA_level_2)) + geom_density2d() + stat_cor(inherit.aes = F, aes(x=predicted, y=real)) + theme_classic2() +
scale_fill_manual(values = kelly()[c(7,5,4,6)]) + facet_wrap(~gene)+ theme(legend.position = "top") +
ylab("Real") + xlab("scLinear - Predicted") + scale_color_manual(values = my_pal[c(5,8)]) + geom_point(alpha=0.5,size=0.5) +
scale_y_break(c(2.1,3.5)) + scale_y_break(c(3.7,4.3)) + theme(axis.title = element_blank()) + theme(legend.title = element_blank()) +
scale_y_continuous(breaks = c(0,0.5,1,1.5,2,2.5, 3, 3.5, 3.7, 4.5)) +
theme(
axis.text.y.right = element_blank(),
axis.line.y.right = element_blank(),
axis.ticks.y.right = element_blank()
)
p2 <- ggplot(df[which(df$gene %in% c("CD3")),], aes(x=predicted, y=real, col=scMRMA_level_2)) + geom_density2d() + stat_cor(inherit.aes = F, aes(x=predicted, y=real)) + theme_classic2() +
scale_fill_manual(values = kelly()[c(7,5,4,6)]) + facet_wrap(~gene) + theme(legend.position = "top") +
ylab("Real") + xlab("scLinear - Predicted")+ scale_color_manual(values = my_pal[c(5,8)]) + geom_point(alpha=0.5,size=0.5) +
theme(axis.title = element_blank())+ theme(legend.title = element_blank()) +
scale_y_continuous(breaks = c(0,0.5,1,1.5,2,2.5, 3, 3.5, 4, 4.5)) +
theme(
axis.text.y.right = element_blank(),
axis.line.y.right = element_blank(),
axis.ticks.y.right = element_blank()
) + theme(axis.text.y = element_text(margin = margin(t = 0, r = 0, b = 0, l = 10))) + theme(plot.margin = unit(c(0,0.5,0,0.2), units = "cm"))
p2 <- p2 + theme(legend.position = "none")
p1 <- p1 + theme(legend.position = "none")
p2 / p1

if(save_figures){ggsave("local/panels/Final/Main/3/Figure.3.c.r2.pdf", width = 20/3, height = (29/3*2), units = "cm")
write.table(df, "data/Figure.3.c.csv", sep = ",", col.names = TRUE, row.names = FALSE)}
Figure 3 d
min_cutoff <- "q10"
max_cutoff <- "q90"
DefaultAssay(sobj) <- "RNA"
p1 <- SpatialFeaturePlot(sobj, features = c("CD19"), min.cutoff = min_cutoff, max.cutoff = max_cutoff, images = "hires", stroke = 0,pt.size.factor = 6) +
theme(legend.position = "bottom") + ggtitle("RNA") + theme(plot.title = element_text(size=10, hjust = 0.5)) +
theme(legend.title = element_blank()) + scale_fill_gradientn(colours = inferno(11)) + theme(legend.position = "none")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
DefaultAssay(sobj) <- "ADT"
p2 <- SpatialFeaturePlot(sobj, features = c("CD19"), min.cutoff = min_cutoff, max.cutoff = max_cutoff, images = "hires", stroke = 0,pt.size.factor = 6) +
theme(legend.position = "bottom") + ggtitle("ADT") + theme(plot.title = element_text(size=10, hjust = 0.5))+
theme(legend.title = element_blank()) + scale_fill_gradientn(colours = inferno(11)) + theme(legend.position = "none")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
DefaultAssay(sobj) <- "pred_ADT"
p3 <- SpatialFeaturePlot(sobj, features = c("CD19"), min.cutoff = min_cutoff, max.cutoff = max_cutoff, images = "hires", stroke = 0,pt.size.factor = 6) +
theme(legend.position = "bottom") + ggtitle("ScLinear") + theme(plot.title = element_text(size=10, hjust = 0.5))+
theme(legend.title = element_blank()) + scale_fill_gradientn(colours = inferno(11)) + theme(legend.position = "none")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
DefaultAssay(sobj) <- "RNA"
p1.2 <- SpatialFeaturePlot(sobj, features = c("CD3E"), min.cutoff = min_cutoff, max.cutoff = max_cutoff, images = "hires", stroke = 0,pt.size.factor = 6) +
theme(legend.position = "bottom") + ggtitle("RNA") + theme(plot.title = element_text(size=10, hjust = 0.5)) +
theme(legend.title = element_blank()) + scale_fill_gradientn(colours = inferno(11)) + theme(legend.position = "none")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
DefaultAssay(sobj) <- "ADT"
p2.2 <- SpatialFeaturePlot(sobj, features = c("CD3"), min.cutoff = min_cutoff, max.cutoff = max_cutoff, images = "hires", stroke = 0,pt.size.factor = 6) +
theme(legend.position = "bottom") + ggtitle("ADT") + theme(plot.title = element_text(size=10, hjust = 0.5))+
theme(legend.title = element_blank()) + scale_fill_gradientn(colours = inferno(11)) + theme(legend.position = "none")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
DefaultAssay(sobj) <- "pred_ADT"
p3.2 <- SpatialFeaturePlot(sobj, features = c("CD3"), min.cutoff = min_cutoff, max.cutoff = max_cutoff, images = "hires", stroke = 0,pt.size.factor = 6) +
theme(legend.position = "bottom") + ggtitle("ScLinear") + theme(plot.title = element_text(size=10, hjust = 0.5))+
theme(legend.title = element_blank()) + scale_fill_gradientn(colours = inferno(11)) + theme(legend.position = "none")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
p1<- ggarrange(p1,p2,p3, nrow = 1, ncol = 3)
p2<- ggarrange(p1.2,p2.2,p3.2, nrow = 1, ncol = 3)
p <- ggarrange(p2,p1, nrow = 2)
p

if(save_figures){ggsave(plot = p, "local/panels/Final/Main/3/Figure.3.d.r2.pdf", height = 29/3, width = 20/3*2, units = "cm")}
Figure 3 e
pbmc <- read.table("local/PBMC/feature_importance_pbmc10k.txt", header = T)
cd3_cd19_pbmc <- pbmc[c("CD3","CD19"),]
rownames(cd3_cd19_pbmc) <- paste0(rownames(cd3_cd19_pbmc),"_PBMC")
tonsil <- read.table("local/Visium/feature_importance_tonsil.txt", header = T)
cd3_cd19_tonsil <- tonsil[c("CD3","CD19"),]
rownames(cd3_cd19_tonsil) <- paste0(rownames(cd3_cd19_tonsil),"_Tonsil")
cd3_cd19 <- rbind(cd3_cd19_pbmc, cd3_cd19_tonsil)
cd3_cd19 <- t(cd3_cd19)
cd3_cd19 <- cd3_cd19[order(cd3_cd19[,1], decreasing = T),]
cd3_pbmc <- rownames(cd3_cd19)[order(cd3_cd19[,1], decreasing = T)[1:20]]
cd19_pbmc <- rownames(cd3_cd19)[order(cd3_cd19[,2], decreasing = T)[1:20]]
cd3_tonsil <- rownames(cd3_cd19)[order(cd3_cd19[,3], decreasing = T)[1:20]]
cd19_tonsil <- rownames(cd3_cd19)[order(cd3_cd19[,4], decreasing = T)[1:20]]
genes <- unique(c(cd3_tonsil, cd19_tonsil))
myColor <- colorRampPalette(rev(brewer.rdbu(11)))(11)
myBreaks <- c(seq(min(cd3_cd19[genes,3:4]), 0, length.out=ceiling(11/2) + 1),
seq(max(cd3_cd19[genes,3:4])/11, max(cd3_cd19[genes,3:4]), length.out=floor(11/2)))
pheatmap(t(cd3_cd19[genes,3:4]), color=myColor, breaks=myBreaks, legend = F, cluster_rows = F, treeheight_col = 1,
border_color = "black", fontsize_col = 8, labels_row = c("CD3","CD19"))

if(save_figures){ggsave("local/panels/Final/Main/3/Figure.3.e.r2.pdf", height = (29/3) - 2, width = (20/3*1.6) , units = "cm")
write.table(cd3_cd19[genes,3:4], "data/Figure.3.e.csv", sep = ",", col.names = TRUE, row.names = FALSE)}
Figure 3 f
go <- enrichr(cd3_tonsil, databases = "GO_Biological_Process_2023")
## Uploading data to Enrichr... Done.
## Querying GO_Biological_Process_2023... Done.
## Parsing results... Done.
go <- go$GO_Biological_Process_2023
go <- go[1:5,]
ggplot(go, aes(x=reorder(Term, Adjusted.P.value), y=-log10(Adjusted.P.value))) +
geom_bar(stat="identity", col="black", fill=rev(brewer.rdbu(11))[11]) +
theme_classic2() + xlab("") + theme(axis.text.x = element_text(angle=90, hjust = 1)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) + ylab("-log10(Adj.Pvalue)")

go2 <- enrichr(cd19_tonsil,databases = "GO_Biological_Process_2023")
## Uploading data to Enrichr... Done.
## Querying GO_Biological_Process_2023... Done.
## Parsing results... Done.
go2 <- go2$GO_Biological_Process_2023
go2 <- go2[1:5,]
ggplot(go2, aes(x=reorder(Term, Adjusted.P.value), y=-log10(Adjusted.P.value))) +
geom_bar(stat="identity", col="black", fill=rev(brewer.rdbu(11))[11]) +
theme_classic2() + xlab("") + theme(axis.text.x = element_text(angle=90, hjust = 1)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) + ylab("-log10(Adj.Pvalue)")

go$Marker <- "CD3"
go2$Marker <- "CD19"
go <- rbind(go, go2)
strip <- strip_themed(background_x = elem_list_rect(fill = my_pal[c(5,8)]), text_x = element_text(color="white"))
ggplot(go, aes(x=reorder(Term, Adjusted.P.value), y=-log10(Adjusted.P.value))) +
geom_bar(stat="identity", col="black", fill="white") +
theme_classic2() + xlab("") + theme(axis.text.x = element_text(angle=90, hjust = 1)) +
scale_x_discrete(labels = function(x) str_wrap(x, width = 30)) + ylab("-log10(Adj.Pvalue)") +
facet_wrap2(~Marker, scales = "free", strip = strip) +
scale_fill_manual(values = my_pal[c(5,8)]) + theme(legend.position = "none") + theme(axis.text.x = element_text(vjust=0.5))

if(save_figures){ggsave("local/panels/Final/Main/3/Figure.3.f.r2.pdf", height = 29/3, width = 20/3*1.5, units = "cm")
write.table(go, "data/Figure.3.f.csv", sep = ",", col.names = TRUE, row.names = FALSE)}
Figure 2 c
local_pal <- c("#525240", "#03878F")
dropout_res <- readRDS("./local/dropout_res_pbmc10k.rds")
DF <- dropout_res%>% group_by(cell, dropout, type) %>% summarise(Pearson = cor(real, predicted_adt))
## `summarise()` has grouped output by 'cell', 'dropout'. You can override using
## the `.groups` argument.
DF$Pearson <- as.numeric(DF$Pearson )
DF$type [!(DF$type == "original")]<- "added_dropout"
pos <- position_jitter(width = 0, height = 0, seed = 6)
p <- ggplot(DF, aes(x = dropout, y = Pearson, color = type, group = dropout)) +
geom_boxplot(show.legend = FALSE, outlier.size = 0.2) + theme_bw() +
#geom_jitter(position=pos, size = 0.8) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_fill_manual(values = local_pal) +
ylab("Pearson\n(Real vs ScLinear)") + xlab("") +
theme(legend.position = "none") +
scale_color_manual(values = c(local_pal)) + scale_fill_manual(values = c(local_pal)) +
theme(legend.position = "none") +
theme(legend.title = element_blank()) +
theme(legend.text = element_text(size = 8)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab("Pearson") +
xlab("Dropout rate") + ggtitle("PBMC10K")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
p

if(save_figures){ggsave(plot = p, "local/panels/Final/Main/2/Figure.2.c.r2.pdf", width = 20/2, height = 29/3, units = 'cm')
write.table(DF, "data/Figure.2.c.csv", sep = ",", col.names = TRUE, row.names = FALSE)}
Supplementary Figure 1 c
local_pal <- c("#525240", "#03878F")
dropout_res <- readRDS("local/dropout_res_pbmc5k.rds")
DF <- dropout_res%>% group_by(cell, dropout, type) %>% summarise(Pearson = cor(real, predicted_adt))
## `summarise()` has grouped output by 'cell', 'dropout'. You can override using
## the `.groups` argument.
DF$Pearson <- as.numeric(DF$Pearson )
DF$type [!(DF$type == "original")]<- "added_dropout"
pos <- position_jitter(width = 0, height = 0, seed = 6)
p <- ggplot(DF, aes(x = dropout, y = Pearson, color = type, group = dropout)) +
geom_boxplot(show.legend = FALSE, outlier.size = 0.2) + theme_bw() +
#geom_jitter(position=pos, size = 0.8) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_fill_manual(values = local_pal) +
ylab("Pearson\n(Real vs ScLinear)") + xlab("") +
theme(legend.position = "none") +
scale_color_manual(values = c(local_pal)) + scale_fill_manual(values = c(local_pal)) +
theme(legend.position = "none") +
theme(legend.title = element_blank()) +
theme(legend.text = element_text(size = 8)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab("Pearson") +
xlab("Dropout rate") + ggtitle("PBMC5K")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
p

if(save_figures){ggsave(plot = p, "local/panels/Final/Main/2/Supplementary.Figure.1.c.r2.pdf", width = 20/2, height = 29/3, units = 'cm')
write.table(DF, "data/Supplementary.Figure.1.c.csv", sep = ",", col.names = TRUE, row.names = FALSE)}
Figure 2 d
DF <- readRDS("local/mls_other_species.rds")
DF$tissue <- NA
DF$tissue[grepl(DF$train_test, pattern = "LymphNode")] <- "Lymph Node"
DF$tissue[grepl(DF$train_test, pattern = "Spleen")] <- "Spleen"
df <- DF %>% dplyr::select(cell, gene, real, predicted_adt, cell_type_2, train_test, tissue) %>% group_by(gene, train_test, tissue) %>% summarise(Pearson = cor(real, predicted_adt)) %>% ungroup()
## `summarise()` has grouped output by 'gene', 'train_test'. You can override
## using the `.groups` argument.
local_pal <- c("#A2A187", "#525240", "#68C6A4", "#03878F")
df$name <- ""
pos <- position_jitter(width = 0.3, height = 0, seed = 6)
ggplot(df, aes(x="", y = Pearson, fill = train_test, label = name)) +
geom_boxplot(show.legend = FALSE, outlier.size = 0, outlier.stroke = 0) +
theme_bw() +
geom_jitter(position=pos, size = 0.8) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_fill_manual(values = local_pal) +
ylab("Pearson\n(Real vs ScLinear)") + xlab("") +
theme(legend.position = "none") +
geom_label_repel(position = pos,
colour = "black", fill = "white", segment.colour="black",
min.segment.length = 0,
show.legend = TRUE, size = 1.5,
box.padding = 0.25, point.padding = 0) +
theme(axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 8),
panel.spacing = unit(0.05, "lines"),
strip.text.x = element_text(size = 8)) +
facet_grid(~train_test) +
theme(panel.spacing = unit(c(0.1, 0.5, 0.1), "lines"))

if(save_figures){ggsave("./local/panels/Final/Main/2/Figure.2.d.r2.pdf" , width = 20/2, height = 29/3, units = 'cm')
write.table(df, "data/Figure.2.d.csv", sep = ",", col.names = TRUE, row.names = FALSE)}
Supplementary Figure 1 a
PBMC <- read.table("local/PBMC5K/bit_table_pbmc5k_2.csv", header = T, sep=',')
PBMC <- PBMC %>% dplyr::select(-c(rna_normalized, rna_raw))
PBMC <- na.omit(PBMC)
# deduplicate the separation of complexes that was used for RNA/ADT comparison
map <- list(
list(gexp = c("HLA-DRA", "HLA-DRB1", "HLA-DRB5"), adt = c("HLA-DR")),
list(gexp = c("CD3E", "CD3D", "CD3G"), adt = c("CD3")),
list(gexp = c("CD8A", "CD8B"), adt = c("CD8"))
)
for (m in map){
print(paste0("gexp: ", paste0(m$gexp, collapse = ","), " adt: ", paste0(m$adt, collapse = ",")))
gexp_names <- m$gexp
adt_name <- m$adt
for(gexp_name in gexp_names){
PBMC$gene[PBMC$gene == gexp_name] <- adt_name
}
}
## [1] "gexp: HLA-DRA,HLA-DRB1,HLA-DRB5 adt: HLA-DR"
## [1] "gexp: CD3E,CD3D,CD3G adt: CD3"
## [1] "gexp: CD8A,CD8B adt: CD8"
PBMC <- PBMC %>% distinct()
cors <- PBMC %>% group_by(gene) %>% dplyr::summarise(correlation = cor(real, predicted_sclinear))
ggplot(cors, aes(x=reorder(gene, correlation), y= correlation,fill=correlation)) + geom_bar(stat="identity", col="black") + coord_flip() +
theme_classic2() + scale_fill_gradientn(colours = inferno(11)) + ylab("Pearson\n(Real vs ScLinear)") + xlab("Protein") + theme(legend.position = "none") +
ggtitle("PBMC CITE-seq\n(5k cells)") + theme(plot.title = element_text(hjust = 0.5)) +
theme( axis.text.y = element_text(size = 8))

if(save_figures){ggsave("local/panels/Final/Supplementary/1/Supplementary.Figure.1.a.r2.pdf" , width = 20/3, height = 29/3, units = 'cm')
write.table(df, "data/Supplementary.Figure.1.a.csv", sep = ",", col.names = TRUE, row.names = FALSE)}
Reviever Figures
DF <- read.table("local/PBMC/bit_table_pbmc10k_2.csv", header = T, sep=',')
df <- DF %>% na.omit() %>% group_by(gene, adt_gene_names_real, adt_gene_names_predicted, rna_gene_names_normalized, rna_gene_names_raw) %>%
summarize('scLinear vs ADT' = cor(real, predicted_sclinear, method = "pearson"),
'RNA vs ADT' = cor(real, rna_normalized, method = "pearson"))
## `summarise()` has grouped output by 'gene', 'adt_gene_names_real',
## 'adt_gene_names_predicted', 'rna_gene_names_normalized'. You can override using
## the `.groups` argument.
df <- df %>%pivot_longer( cols = -c(gene, adt_gene_names_real, adt_gene_names_predicted, rna_gene_names_normalized, rna_gene_names_raw), values_to = "Pearson", names_to = "Comparison")
df$Comparison <- factor(df$Comparison, levels = rev(sort(unique(df$Comparison))))
df$gene_comp <- paste0(df$gene, "_", df$Comparison)
local_pal <- c("#68C6A4", "#A2A187")
df <- df %>% arrange(gene, Comparison)
## fix the one to many mapping
df$name <- df$gene
df <- df %>% mutate(name = ifelse(Comparison == "RNA vs ADT", rna_gene_names_normalized, adt_gene_names_real))
set.seed(3)
df$JitteredX <- jitter(rep(0, nrow(df)), amount = 0.2, factor = 1)
jitter_merge <- df %>% ungroup() %>% dplyr::select(name, Comparison, JitteredX)%>% group_by(name, Comparison) %>% summarise(JitteredX_new = mean(JitteredX))
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
df <- df %>% left_join(jitter_merge, by = c("name", "Comparison")) %>% mutate(JitteredX = JitteredX_new) %>% dplyr::select(-c(JitteredX_new)) %>% mutate(name2 = name)
df$name2[!((df$rna_gene_names_normalized %in% c("CD3", "CD19", "CD14", "CD56")) | (df$adt_gene_names_real %in% c("CD3", "CD19", "CD14", "CD56")))] <- ""
df_reduced <- df %>% ungroup() %>% dplyr::select(c(name, name2, Comparison, Pearson, JitteredX)) %>% distinct()
pos <- position_nudge(df_reduced$JitteredX)
pos2 <- position_nudge(df$JitteredX)
ggplot(df_reduced, aes(x=Comparison, y = Pearson, fill = Comparison, label = name2)) +
geom_boxplot(show.legend = FALSE, outlier.stroke = 0, outlier.size = 0) +
theme_bw() +
geom_point(position=pos, size = 0.9) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_fill_manual(values = local_pal) +
ylab("Pearson") + xlab("") +
theme(legend.position = "none") +
geom_label_repel(position = pos,
colour = "black", fill = "white", segment.colour="black",
min.segment.length = 1,
show.legend = TRUE, size = 2.5,
box.padding = 0.25, point.padding = 0) +
theme(axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 8),
panel.spacing = unit(0.05, "lines"),
strip.text.x = element_text(size = 7)) +
geom_line(data = df, aes(x = Comparison, y = Pearson, group = gene),position = pos2, size = 0.25 , color = "darkblue") +
ggtitle("PBMC10K")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

if(save_figures){ggsave("local/panels/Final/Reviewer/1/C.PBMC10K_Pearson_RNA_correlation.r1.pdf", width = 20/2, height = 29/2, units = 'cm')}
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
B
DF <- read.table("local/PBMC5K/bit_table_pbmc5k_2.csv", header = T, sep=',')
df <- DF %>% na.omit() %>% group_by(gene, adt_gene_names_real, adt_gene_names_predicted, rna_gene_names_normalized, rna_gene_names_raw) %>%
summarize('scLinear vs ADT' = cor(real, predicted_sclinear, method = "pearson"),
'RNA vs ADT' = cor(real, rna_normalized, method = "pearson"))
## `summarise()` has grouped output by 'gene', 'adt_gene_names_real',
## 'adt_gene_names_predicted', 'rna_gene_names_normalized'. You can override using
## the `.groups` argument.
df <- df %>%pivot_longer( cols = -c(gene, adt_gene_names_real, adt_gene_names_predicted, rna_gene_names_normalized, rna_gene_names_raw), values_to = "Pearson", names_to = "Comparison")
df$Comparison <- factor(df$Comparison, levels = rev(sort(unique(df$Comparison))))
df$gene_comp <- paste0(df$gene, "_", df$Comparison)
df <- df %>% mutate(name = ifelse(Comparison == "RNA vs ADT", rna_gene_names_normalized, adt_gene_names_real))
set.seed(5)
df$JitteredX <- jitter(rep(0, nrow(df)), amount = 0.2, factor = 1)
jitter_merge <- df %>% ungroup() %>% dplyr::select(name, Comparison, JitteredX)%>% group_by(name, Comparison) %>% summarise(JitteredX_new = mean(JitteredX))
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
df <- df %>% left_join(jitter_merge, by = c("name", "Comparison")) %>% mutate(JitteredX = JitteredX_new) %>% dplyr::select(-c(JitteredX_new)) %>% mutate(name2 = name)
df$name2[!((df$rna_gene_names_normalized %in% c("CD3", "CD19", "CD14", "CD56")) | (df$adt_gene_names_real %in% c("CD3", "CD19", "CD14", "CD56")))] <- ""
df_reduced <- df %>% ungroup() %>% dplyr::select(c(name, name2, Comparison, Pearson, JitteredX)) %>% distinct()
pos <- position_nudge(df_reduced$JitteredX)
pos2 <- position_nudge(df$JitteredX)
ggplot(df_reduced, aes(x=Comparison, y = Pearson, fill = Comparison, label = name2)) +
geom_boxplot(show.legend = FALSE, outlier.stroke = 0, outlier.size = 0) +
theme_bw() +
geom_point(position=pos, size = 0.9) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_fill_manual(values = local_pal) +
ylab("Pearson") + xlab("") +
theme(legend.position = "none") +
geom_label_repel(position = pos,
colour = "black", fill = "white", segment.colour="black",
min.segment.length = 1,
show.legend = TRUE, size = 2.5,
box.padding = 0.25, point.padding = 0) +
theme(axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 8),
panel.spacing = unit(0.05, "lines"),
strip.text.x = element_text(size = 7)) +
geom_line(data = df, aes(x = Comparison, y = Pearson, group = gene),position = pos2, size = 0.25 , color = "darkblue") +
ggtitle("PBMC5K")
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

if(save_figures){ggsave("local/panels/Final/Reviewer/1/B.PBMC5K_Pearson_RNA_correlation.r1.pdf", width = 20/2, height = 29/2, units = 'cm')}
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
C
mls_bit_table <- read.table("local/MouseLymphNodes/bit_table_mls_2.csv", header = T, sep=',')
mls_bit_table <- na.omit(mls_bit_table)
cors <- mls_bit_table %>% group_by(gene, sample) %>% dplyr::summarise(correlation = cor(real, predicted_sclinear))%>% arrange(desc(correlation))
## `summarise()` has grouped output by 'gene'. You can override using the
## `.groups` argument.
cors$gene <- factor(cors$gene, levels = unique(cors$gene))
local_pal <- c("#A2A187", "#525240", "#68C6A4", "#03878F")
cors$name <- ""
cors$name[cors$gene %in% c("CD3", "CD19", "CD27", "CD11b")] <- as.vector(cors$gene[cors$gene %in% c("CD3", "CD19", "CD27", "CD11b")])
pos <- position_jitter(width = 0.3, height = 0, seed = 6)
ggplot(cors, aes(x="", y = correlation, fill = sample, label = name)) +
geom_boxplot(show.legend = FALSE, outlier.stroke = 0, outlier.size = 0) +
theme_bw() +
geom_jitter(position=pos, size = 0.8) +
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
scale_fill_manual(values = local_pal) +
ylab("Pearson\n(Real vs ScLinear)") + xlab("") +
theme(legend.position = "none") +
geom_label_repel(position = pos,
colour = "black", fill = "white", segment.colour="black",
min.segment.length = 0,
show.legend = TRUE, size = 1.5,
box.padding = 0.25, point.padding = 0) +
theme(axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 8),
panel.spacing = unit(0.05, "lines"),
strip.text.x = element_text(size = 7)) +
facet_grid(~sample)
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?

if(save_figures){ggsave("local/panels/Final/Reviewer/1/C.MLS_Pearson_correlation_boxplot_mls_cross_species.r1.pdf", width = 20/2, height = 29/2, units = 'cm')}
## Warning: The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## The following aesthetics were dropped during statistical transformation: label
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?